load("fig5_input_image.rda")

library(tidyverse)
library(lubridate)
library(splitstackshape)
library(scales)
library(grid)

sample.times <- read_csv("sample_times.csv")

sample.times$new.tip <- tip.renaming$new.label[match(sample.times$sequenceName, tip.renaming$tip.name)]
sample.times$new.tip[is.na(sample.times$new.tip)] <- sample.times$sequenceName[is.na(sample.times$new.tip)]
sample.times$split.patient <- sapply(sample.times$new.tip, function(x){
  unlist(strsplit(x, "_"))[1]
})

hosts <- unique(sample.times$split.patient)
hosts <- hosts[order(hosts)]

hospital.stays$split.host <- as.character(hospital.stays$split.host)
icu.stays$split.host <- as.character(icu.stays$split.host)

first <- T
ct <- read.csv("posterior_collapsed_trees/Collapsed_Tree_Posterior_ds_10percol_1.csv", stringsAsFactors = F)
host.set <- unique(ct$hosts)

row.list <- list()
plaus.list <- list()
row.gap.list <- list()

for(i in 1:100){
  # one per tree
  
  ct <- read.csv(paste0("posterior_collapsed_trees/Collapsed_Tree_Posterior_ds_10percol_",i,".csv"), stringsAsFactors = F)
  spt <- read.csv(paste0("posterior_splits/Splits_Posterior_ds_10percol_",i,".csv"), stringsAsFactors = F)
  
  spt$tip.times <- as.numeric(sample.times$days.since.start[match(spt$tip, sample.times$new.tip)])
  
  parents.vector <- vector() 
  gap.parents.vector <- vector()
  plausibility.vector <- vector()
  
  for(host in host.set){
    # each host in each tree
    
    lines <- which(ct$hosts == host)
    parents <- ct$parent.hosts[lines]
    
    plausibility <- vector()
    host <- vector()
    
    for(line in lines){
      
      done <- F
      child.split <- ct$unique.splits[line]
      parent.split <- ct$parent.splits[line]
      parent.host <- ct$parent.hosts[line]
      
      if(parent.host=="unassigned_region"){
        
        grandparent.host <- ct$parent.hosts[which(ct$unique.splits==parent.split)]
        if(grandparent.host=="root"){
          plausibility <- c(plausibility, "E")
          done <- T
        } else {
          parent.host <- grandparent.host
        }
      } 
      
      parent.hosts <- c(parent.hosts, parent.host)
      
      if(!done){
        if(startsWith(child.split, "T")){
          
          time.of.earliest.tip <- min(spt$tip.times[which(spt$subgraph==child.split)])
          
          time.of.earliest.parent.sample <- min(spt$tip.times[which(spt$host==parent.host)])
          
          parent.icu.admissions <- as.vector(timings[which(timings$study_no==substr(parent.host,1, 4)), c("icu_adm_d_1", "icu_adm_d_2", "icu_adm_d_3", "icu_adm_d_4")])
          parent.icu.discharges <- as.vector(timings[which(timings$study_no==substr(parent.host,1, 4)), c("icu_disc_d_1", "icu_disc_d_2", "icu_disc_d_3", "icu_disc_d_4")])
          
          date.differences <- time.of.earliest.parent.sample - parent.icu.admissions
          date.differences <- date.differences[!is.na(date.differences)]
          
          # if the parent was positive at their first ICU admission, then the transmission is impossible (if it happened in the hospital) if the parent's hospital admission date is
          # after the child's earliest sample time
          
          # if the parent was first positive at a later ICU admission, then the transmission is impossible (if it happened in the hospital) if end of a previous parent ICU stay is
          # after the child's earliest sample time
          
          # otherwise the transmission is impossible if the parent's first sample date is after the child's
          
          if(any(abs(date.differences) <= 1)){
            index <- which(abs(date.differences) <= 1)
            if(length(index)>1){
              stop("Multiple matches?")
            }
            if(index==1){
              parent.latest.date <- timings[which(timings$study_no==substr(parent.host,1, 4)), "hosp_adm_d_1"]
            } else {
              icu.column.header <- paste0("icu_adm_d_",index-1)
              parent.latest.date <- timings[which(timings$study_no==substr(parent.host,1, 4)), icu.column.header]
            }
          } else {
            parent.latest.date <- time.of.earliest.parent.sample
          }
          
          if(parent.latest.date > time.of.earliest.tip){
            plausibility <- c(plausibility, "N")
          } else {
            plausibility <- c(plausibility, "Y")
          }
        } else {
          if(startsWith(parent.split, "T")){
            plausibility <- c(plausibility, "N")
          } else {
            plausibility <- c(plausibility, "Y")
          }
        }
      }
      
    }
    if(all(plausibility=="E")){
      plausibility.vector <- c(plausibility.vector, "E")
    } else if(any(plausibility=="N")){
      plausibility.vector <- c(plausibility.vector, "N")
    } else {
      plausibility.vector <- c(plausibility.vector, "A")
    }
    
    recorded.parent.hosts <- unique(parent.hosts)
    recorded.parent.hosts <- recorded.parent.hosts[which(recorded.parent.hosts!="unassigned_region")]
    if(length(recorded.parent.hosts)==0){
      gap.parents.vector <- c(gap.parents.vector, "root")
    } else if(length(recorded.parent.hosts)==1){
      gap.parents.vector <- c(gap.parents.vector, recorded.parent.hosts)
    } else {
      gap.parents.vector <- c(gap.parents.vector, "several")
    }
    
    parents <- unique(parents)
    proper.parents <- parents[which(parents!="unassigned_region")]
    if(length(proper.parents)>1){
      parents.vector <- c(parents.vector, "several")
    } else if(length(proper.parents)==1) {
      parents.vector <- c(parents.vector, proper.parents)
    } else {
      parents.vector <- c(parents.vector, "unassigned_region")
    }
  }
  row.list[[i]] <- parents.vector
  row.gap.list[[i]] <- gap.parents.vector
  plaus.list[[i]] <- plausibility.vector
}

p.summary.table <- do.call(rbind, plaus.list)

p.summary.table <- as_tibble(p.summary.table)
colnames(p.summary.table) <- host.set

p.summary.table <- p.summary.table[,which(startsWith(host.set, "T"))]

host.set <- host.set[which(startsWith(host.set, "T"))]

another.plaus.list <- list()

for(host in host.set){
  row <- p.summary.table[host]
  counts.vector <- vector()
  for(host2 in c("E", "A", "N")){
    counts.vector <- c(counts.vector, length(which(row==host2)))
  }
  another.plaus.list[[host]] <- counts.vector
}

summary.table.3 <- do.call(rbind, another.plaus.list) 
summary.table.3 <- as_tibble(summary.table.3) %>% add_column(infectee = host.set)
colnames(summary.table.3) <- c("All.external.source", "All.possible", "Some.impossible", "infectee")

stacked <- gather(summary.table.3, variable, value, 1:3)
stacked <- stacked[stacked$value > 0,]

stacked$colo <- sapply(stacked$infectee, function(x) gsub("T", "C", x))

single.sampled <- read_csv("trace_colonisation_patients.csv") %>% pull(id)

stacked$single.instance <-stacked$infectee %in% single.sampled

stacked <- stacked[order(stacked$colo),]
stacked$colo <- factor(stacked$colo, levels = c(unique(stacked$colo[which(!stacked$single.instance)]), 
                                                unique(stacked$colo[which(stacked$single.instance)])))

ggplot(stacked, aes(colo)) + 
  geom_rect(xmin=0, xmax=27.5, ymin=-5, ymax=105, fill='gray35', alpha=0.01) +
  geom_bar(aes(weight=value/100, fill=variable)) + 
  theme_bw() +
  xlab("Recipient colonisation") + 
  ylab("Posterior support") +
  scale_fill_manual(values=c("#b3cde3",  "#ccebc5", "#fbb4ae"), name="Inferred ancestors", labels=c("All external sources", "Some internal sources,\nall possible", "Some internal sources,\nsome impossible")) + 
  guides(fill=guide_legend(keyheight=2)) + 
  theme(axis.text.x = element_text(angle = 90, vjust=1)) +
  geom_label(label="Non-trace", y=1.025, x=14, hjust=0.5) +
  geom_label(label="Trace", y=1.025, x=44, hjust=0.5) +
  theme(legend.key.height = unit(2.5,"line"), axis.text.x = element_text(vjust=0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave("Figure5S1.pdf", width=20, height=10)

reduced.stacked <- stacked[which(!(stacked$single.instance)),]

ggplot(reduced.stacked, aes(colo)) + 
  geom_bar(aes(weight=value/100, fill=variable)) + 
  theme_bw() +
  xlab("Recipient colonisation") + 
  ylab("Posterior support") +
  scale_fill_manual(values=c("#b3cde3",  "#ccebc5", "#fbb4ae"), name="Inferred ancestors", labels=c("All external sources", "Some internal sources,\nall possible", "Some internal sources,\nsome impossible")) + 
  guides(fill=guide_legend(keyheight=2)) + 
  theme(axis.text.x = element_text(angle = 90, vjust=1)) +
  theme(legend.key.height = unit(2.5,"line"), axis.text.x = element_text(vjust=0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank())


ggsave("Figure5.pdf", width=10, height=6)
